data_dir <- "/Users/frd2007/Documents/Projects/2020-09_CZI_JamesJingli/Jingli/data/"

All cells

scf2 <- readRDS(paste0(data_dir, "sce_EryFiltered_ZF_sampleIntegration_2022-01-14_noCounts.rds"))
rownames(scf2) <-scater::uniquifyFeatureNames(rowData(scf2)$ID, rowData(scf2)$Symbol)

scf2$old_labels_coarse <- ifelse(grepl("Macrophage", scf2$label), "M0",
    ifelse(grepl("T-cells", scf2$label), "Tcell",
    ifelse(grepl("Fibroblast", scf2$label),"Fibroblast",
        ifelse(grepl("Cardiomyocyte", scf2$label), "CM", 
            ifelse(grepl("Endocardium", scf2$label),"Endocard.",
                ifelse(grepl("Epicardium", scf2$label), "Epicard.",
                    ifelse(grepl("Ery", scf2$label), "Ery",
                    ifelse(grepl("eutroph", scf2$label), "Neutrophils",
                        ifelse(grepl("delta", scf2$label), "delta",
                        ifelse(grepl("EC_", scf2$label), "EC",
                            ifelse(grepl("ndothelial", scf2$label), "EC", scf2$label)))))))))))
scf2$old_labels_coarse <- gsub("-cells", "cell", scf2$old_labels_coarse)
scf2$old_labels_coarse <- gsub("cells$", "", scf2$old_labels_coarse)
mks <- fread("~/Documents/Projects/2020-09_CZI_JamesJingli/Jingli/2022-02_ZFMarkerGeneComparison/zf_markerGeneList.txt", header=FALSE)
setnames(mks, names(mks),c("cellType", "symbol"))
scABC2::plot_reducedDim_from_sce(scf2[, !grepl("^Heart", scf2$old_labels_coarse)],
    which_reddim = "UMAP", color_by = "old_labels_coarse", remove_rug=TRUE,
    size_by = .5, alpha = .3, #label_by="cluster_k200",
    add_cell_info = c("Tissue","post.surgery"))
## Warning: Removed 256 rows containing missing values (geom_point).

scABC2::plot_reducedDim_from_sce(scf2,
    which_reddim = "UMAP", color_by = "cluster_k150", remove_rug=TRUE,
    size_by = .5, alpha = .3, #label_by="cluster_k200",
    add_cell_info = c("Tissue","post.surgery"))

Cardiomyocytes

> mks$symbol[!mks$symbol %in% rowData(scf2)$Symbol]
[1] "myl7"   "ttn.1"  "tnnt2a" "cmlc1" 
plot_reducedDim_from_sce(scf2, which_reddim = "UMAP", 
    exprs_values = "logcounts",
    color_by = mks[cellType == "cardiomyocytes" & symbol %in% rowData(scf2)$Symbol]$symbol) +
    ggtitle("CM marker gene expression")

Epicardial

plot_reducedDim_from_sce(scf2, which_reddim = "UMAP", 
    exprs_values = "logcounts",set_color = FALSE, size_by = .3, alpha = .3,
    color_by = mks[cellType == "epicardial" & symbol %in% rowData(scf2)$Symbol]$symbol) +
    ggtitle("Epicardial marker gene expression") +
    scale_color_viridis_c(direction = -1)

## Cholangiocytes

plot_reducedDim_from_sce(scf2, which_reddim = "UMAP", 
    exprs_values = "logcounts",set_color = FALSE, size_by = .3, alpha = .3,
    color_by = mks[cellType == "cholangiocytes" & symbol %in% rowData(scf2)$Symbol]$symbol) +
    ggtitle("Cholangiocytes marker gene expression") +
    scale_color_viridis_c(direction = -1)

Endothelial

plot_reducedDim_from_sce(scf2, which_reddim = "UMAP", 
    exprs_values = "logcounts",set_color = FALSE, size_by = .3, alpha = .3,
    color_by = mks[cellType == "endothelial" & symbol %in% rowData(scf2)$Symbol]$symbol) +
    ggtitle("Endothelial marker gene expression") +
    scale_color_viridis_c(direction = -1)

Hepatocytes

plot_reducedDim_from_sce(scf2, which_reddim = "UMAP", 
    exprs_values = "logcounts", set_color = FALSE, size_by = .3, alpha = .3,
    color_by = mks[cellType == "hepatocytes" & symbol %in% rownames(scf2)]$symbol) +
    ggtitle("Hepatocyte marker gene expression") +
    scale_color_viridis_c(direction = -1)

There’s some enrichment in the endothelial cluster, want to see which genes are particularly “offending”:

pl <- lapply( mks[cellType == "hepatocytes" & symbol %in% rownames(scf2)]$symbol, function(goi){
    p <- plot_reducedDim_from_sce(scf2, which_reddim = "UMAP", 
    exprs_values = "logcounts", set_color = FALSE, size_by = .3, alpha = .3,
    color_by = goi) +
    ggtitle(goi) +
    scale_color_viridis_c(direction = -1)
    return(p)})
ABCutilities::MultiPlotList(pl, title = "Individual hepatocyte markers")

cp seems more restricted than fabp10a and tfa

Hepatic stellate

plot_reducedDim_from_sce(scf2, which_reddim = "UMAP", 
    exprs_values = "logcounts", set_color = FALSE, size_by = .3, alpha = .3,
    color_by = mks[cellType == "hepatic_stellate" & symbol %in% rownames(scf2)]$symbol) +
    ggtitle("Hepatic stellate marker gene expression") +
    scale_color_viridis_c(direction = -1)

Hepatic stellate

plot_reducedDim_from_sce(scf2, which_reddim = "UMAP", 
    exprs_values = "logcounts", set_color = FALSE, size_by = .3, alpha = .3,
    color_by = mks[cellType == "lymphatic_vessel_cells" & symbol %in% rownames(scf2)]$symbol) +
    ggtitle("Lymph vessel marker gene expression") +
    scale_color_viridis_c(direction = -1)

HSCsHSPCsThrombocytesMegakaryocytes

plot_reducedDim_from_sce(scf2, which_reddim = "UMAP", 
    exprs_values = "logcounts", set_color = FALSE, size_by = .3, alpha = .3,
    color_by = mks[cellType == "HSCsHSPCsThrombocytesMegakaryocytes" & symbol %in% rownames(scf2)]$symbol) +
    ggtitle("HSC/HSPC/Thrombo/Megakary marker gene expression") +
    scale_color_viridis_c(direction = -1)

pl <- lapply( mks[cellType == "HSCsHSPCsThrombocytesMegakaryocytes" & symbol %in% rownames(scf2)]$symbol, function(goi){
    p <- plot_reducedDim_from_sce(scf2, which_reddim = "UMAP", 
    exprs_values = "logcounts", set_color = FALSE, size_by = .3, alpha = .3,
    color_by = goi) +
    ggtitle(goi) +
    scale_color_viridis_c(direction = -1)
    return(p)})
ABCutilities::MultiPlotList(pl, title = "Individual HSCsHSPCsThrombocytesMegakaryocytes markers")

Kidney tubule

plot_reducedDim_from_sce(scf2, which_reddim = "UMAP", 
    exprs_values = "logcounts", set_color = FALSE, size_by = .3, alpha = .3,
    color_by = mks[cellType == "Proximal_distal_tubule_cells" & symbol %in% rownames(scf2)]$symbol) +
    ggtitle("Proximal/distal tubule marker gene expression") +
    scale_color_viridis_c(direction = -1)

Kidney Nephron

plot_reducedDim_from_sce(scf2, which_reddim = "UMAP", 
    exprs_values = "logcounts", set_color = FALSE, size_by = .3, alpha = .3,
    color_by = mks[cellType == "Nephron_epithelial_cells" & symbol %in% rownames(scf2)]$symbol) +
    ggtitle("Nephron epith. marker gene expression") +
    scale_color_viridis_c(direction = -1)

Mucin-secreting cells

plot_reducedDim_from_sce(scf2, which_reddim = "UMAP", 
    exprs_values = "logcounts", set_color = FALSE, size_by = .3, alpha = .3,
    color_by = mks[cellType == "MucinSecretingCells" & symbol %in% rownames(scf2)]$symbol) +
    ggtitle("Mucin-secreting cells") +
    scale_color_viridis_c(direction = -1)

Erythrocytes

plot_reducedDim_from_sce(scf2, which_reddim = "UMAP", 
    exprs_values = "logcounts", set_color = FALSE, size_by = .3, alpha = .3,
    color_by = mks[cellType == "Erythrocytes" & symbol %in% rownames(scf2)]$symbol) +
    ggtitle("Ery marker gene expression") +
    scale_color_viridis_c(direction = -1)

Re-clustered cluster9_10only

sce <- readRDS(paste0(data_dir, "sce_EryFiltered_ZF_sampleIntegration_AddReclustering_2022-01-27_noCounts.rds"))
rownames(sce) <- scater::uniquifyFeatureNames(rowData(sce)$ID, rowData(sce)$Symbol)
scABC2::plot_reducedDim_from_sce(sce[, !is.na(sce$integK100Clust910Only_k50)],
    which_reddim = "UMAP_cluster9_10only",
    color_by = "integK100Clust910Only_k50", remove_rug=TRUE,
    size_by = .5, alpha = .3, label_by="old_labels_coarse" ,
    add_cell_info = c("Tissue","post.surgery")) +
    theme(legend.position = "bottom")

Cardiomyocytes

plot_reducedDim_from_sce(sce, 
    which_reddim = "UMAP_cluster9_10only", 
    exprs_values = "logcounts",
    color_by = mks[cellType == "cardiomyocytes" & symbol %in% rowData(sce)$Symbol]$symbol) +
    ggtitle("CM marker gene expression")
## Warning: Removed 38290 rows containing missing values (geom_point).

Epicardial

plot_reducedDim_from_sce(sce, which_reddim = "UMAP_cluster9_10only", 
    exprs_values = "logcounts",set_color = FALSE,
    size_by = .3, alpha = .3,
    color_by = mks[cellType == "epicardial" & symbol %in% rowData(sce)$Symbol]$symbol) +
    ggtitle("Epicardial marker gene expression") +
    scale_color_viridis_c(direction = -1)
## Warning: Removed 38290 rows containing missing values (geom_point).

Cholangiocytes

plot_reducedDim_from_sce(sce, which_reddim = "UMAP_cluster9_10only", 
    exprs_values = "logcounts",set_color = FALSE,
    size_by = .3, alpha = .3,
    color_by = mks[cellType == "cholangiocytes" & symbol %in% rowData(sce)$Symbol]$symbol) +
    ggtitle("Cholangiocytes marker gene expression") +
    scale_color_viridis_c(direction = -1)
## Warning: Removed 38290 rows containing missing values (geom_point).

Endothelial

plot_reducedDim_from_sce(sce, which_reddim = "UMAP_cluster9_10only", 
    exprs_values = "logcounts",set_color = FALSE, size_by = .3, alpha = .3,
    color_by = mks[cellType == "endothelial" & symbol %in% rowData(sce)$Symbol]$symbol) +
    ggtitle("Endothelial marker gene expression") +
    scale_color_viridis_c(direction = -1)
## Warning: Removed 38290 rows containing missing values (geom_point).

Hepatocytes

plot_reducedDim_from_sce(sce, which_reddim = "UMAP_cluster9_10only", 
    exprs_values = "logcounts", set_color = FALSE, size_by = .3, alpha = .3,
    color_by = mks[cellType == "hepatocytes" & symbol %in% rownames(sce)]$symbol) +
    ggtitle("Hepatocyte marker gene expression") +
    scale_color_viridis_c(direction = -1)
## Warning: Removed 38290 rows containing missing values (geom_point).

Hepatic stellate

plot_reducedDim_from_sce(sce, which_reddim = "UMAP_cluster9_10only", 
    exprs_values = "logcounts", set_color = FALSE, size_by = .3, alpha = .3,
    color_by = mks[cellType == "hepatic_stellate" & symbol %in% rownames(sce)]$symbol) +
    ggtitle("Hepatic stellate marker gene expression") +
    scale_color_viridis_c(direction = -1)
## Warning: Removed 38290 rows containing missing values (geom_point).

Hepatic stellate

plot_reducedDim_from_sce(sce, which_reddim = "UMAP_cluster9_10only", 
    exprs_values = "logcounts", set_color = FALSE, size_by = .3, alpha = .3,
    color_by = mks[cellType == "lymphatic_vessel_cells" & symbol %in% rownames(sce)]$symbol) +
    ggtitle("Lymph vessel marker gene expression") +
    scale_color_viridis_c(direction = -1)
## Warning: Removed 38290 rows containing missing values (geom_point).

HSCsHSPCsThrombocytesMegakaryocytes

plot_reducedDim_from_sce(sce, which_reddim = "UMAP_cluster9_10only", 
    exprs_values = "logcounts", set_color = FALSE, size_by = .3, alpha = .3,
    color_by = mks[cellType == "HSCsHSPCsThrombocytesMegakaryocytes" & symbol %in% rownames(sce)]$symbol) +
    ggtitle("HSC/HSPC/Thrombo/Megakary marker gene expression") +
    scale_color_viridis_c(direction = -1)
## Warning: Removed 38290 rows containing missing values (geom_point).

pl <- lapply( mks[cellType == "HSCsHSPCsThrombocytesMegakaryocytes" & symbol %in% rownames(sce)]$symbol, function(goi){
    p <- plot_reducedDim_from_sce(sce, which_reddim = "UMAP_cluster9_10only", 
    exprs_values = "logcounts", set_color = FALSE, size_by = .3, alpha = .3,
    color_by = goi) +
    ggtitle(goi) +
    scale_color_viridis_c(direction = -1)
    return(p)})
ABCutilities::MultiPlotList(pl, title = "Individual HSCsHSPCsThrombocytesMegakaryocytes markers")
## Warning: Removed 38290 rows containing missing values (geom_point).

## Warning: Removed 38290 rows containing missing values (geom_point).

## Warning: Removed 38290 rows containing missing values (geom_point).

## Warning: Removed 38290 rows containing missing values (geom_point).

## Warning: Removed 38290 rows containing missing values (geom_point).

## Warning: Removed 38290 rows containing missing values (geom_point).

Kidney tubule

plot_reducedDim_from_sce(sce, which_reddim = "UMAP_cluster9_10only", 
    exprs_values = "logcounts", set_color = FALSE, size_by = .3, alpha = .3,
    color_by = mks[cellType == "Proximal_distal_tubule_cells" & symbol %in% rownames(sce)]$symbol) +
    ggtitle("Proximal/distal tubule marker gene expression") +
    scale_color_viridis_c(direction = -1)
## Warning: Removed 38290 rows containing missing values (geom_point).

Kidney Nephron

plot_reducedDim_from_sce(sce, which_reddim = "UMAP_cluster9_10only", 
    exprs_values = "logcounts", set_color = FALSE, size_by = .3, alpha = .3,
    color_by = mks[cellType == "Nephron_epithelial_cells" & symbol %in% rownames(sce)]$symbol) +
    ggtitle("Nephron epith. marker gene expression") +
    scale_color_viridis_c(direction = -1)
## Warning: Removed 38290 rows containing missing values (geom_point).

Mucin-secreting cells

plot_reducedDim_from_sce(sce, which_reddim = "UMAP_cluster9_10only", 
    exprs_values = "logcounts", set_color = FALSE, size_by = .3, alpha = .3,
    color_by = mks[cellType == "MucinSecretingCells" & symbol %in% rownames(sce)]$symbol) +
    ggtitle("Mucin-secreting cells") +
    scale_color_viridis_c(direction = -1)
## Warning: Removed 38290 rows containing missing values (geom_point).

Erythrocytes

plot_reducedDim_from_sce(sce, which_reddim = "UMAP_cluster9_10only", 
    exprs_values = "logcounts", set_color = FALSE, size_by = .3, alpha = .3,
    color_by = mks[cellType == "Erythrocytes" & symbol %in% rownames(sce)]$symbol) +
    ggtitle("Ery marker gene expression") +
    scale_color_viridis_c(direction = -1)
## Warning: Removed 38290 rows containing missing values (geom_point).

Identifying immune cells

…and separating them from erythrocytes

"hbaa1"
## [1] "hbaa1"